{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"..\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
      "  from collections import Callable\n",
      "\n",
      "    You are using PySparkling of version 2.4.10, but your PySpark is of\n",
      "    version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n",
      "`formatargspec` is deprecated since Python 3.5. Use `signature` and the `Signature` object directly\n"
     ]
    }
   ],
   "source": [
    "from optimus import optimus as Optimus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Name': {'int': 0,\n",
       "  'decimal': 0,\n",
       "  'string': 0,\n",
       "  'date': 0,\n",
       "  'boolean': 0,\n",
       "  'binary': 0,\n",
       "  'array': 0,\n",
       "  'object': 4,\n",
       "  'null': 0,\n",
       "  'missing': 0},\n",
       " 'Age': {'int': 0,\n",
       "  'decimal': 0,\n",
       "  'string': 0,\n",
       "  'date': 0,\n",
       "  'boolean': 0,\n",
       "  'binary': 0,\n",
       "  'array': 0,\n",
       "  'object': 0,\n",
       "  'null': 0,\n",
       "  'missing': 0}}"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [['tom', 10], ['nick', 15], ['juli', 0],['argenis', 10]]\n",
    "\n",
    "# Create the pandas DataFrame \n",
    "df = pd.DataFrame(data, columns = ['Name', 'Age']) \n",
    "df.cols.count_by_dtypes(\"*\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>tom</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>nick</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>juli</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>argenis</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Name  Age\n",
       "0      tom   10\n",
       "1     nick   15\n",
       "2     juli    0\n",
       "3  argenis   10"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df[df == 0].count(axis=0)\n",
    "# Speedy https://stackoverflow.com/questions/35277075/python-pandas-counting-the-occurrences-of-a-specific-value\n",
    "def zeros(series):\n",
    "    return (series.values==0).sum()\n",
    "\n",
    "zeros(df[\"Age\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0.1: 3.0000000000000004, 0.5: 10.0}"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def percentile_agg(col_name, values):  \n",
    "    return col_name.quantile(values).to_dict()\n",
    "    \n",
    "\n",
    "percentile_agg(df[\"Age\"], [0.1, 0.5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'str' object has no attribute 'quantile'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-136-2dcbdc92d217>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[1;31m#         'Age': \"count\",\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m         \u001b[1;31m# min, first, and number of unique dates per group\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m         \u001b[1;34m'Age'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'first'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'nunique'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpercentile_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Age\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0.5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      9\u001b[0m     }\n\u001b[0;32m     10\u001b[0m ).to_dict()\n",
      "\u001b[1;32m<ipython-input-135-baf44f890966>\u001b[0m in \u001b[0;36mpercentile_agg\u001b[1;34m(col_name, values)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mpercentile_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mquantile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mpercentile_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Age\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0.5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'str' object has no attribute 'quantile'"
     ]
    }
   ],
   "source": [
    "df.agg(\n",
    "    {\n",
    "#         # find the min, max, and sum of the duration column\n",
    "#         'Name': [min, max, sum],\n",
    "#          # find the number of network type entries\n",
    "#         'Age': \"count\",\n",
    "        # min, first, and number of unique dates per group\n",
    "        'Age': [min, 'first', 'nunique', percentile_agg(\"Age\",[0.1, 0.5])]\n",
    "    }\n",
    ").to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Name Age\n",
      "0   tom  10\n",
      "1  nick  15\n",
      "2  juli   0\n"
     ]
    }
   ],
   "source": [
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<optimus.pandas.functions.functions.<locals>.Functions object at 0x000001F961E9E438>\n",
      "<optimus.pandas.functions.functions.<locals>.Functions object at 0x000001F961E7EF60>\n",
      "[(<function functions.<locals>.Functions.count_uniques_agg at 0x000001F961EED1E0>, ('Name', True)), (<function functions.<locals>.Functions.count_uniques_agg at 0x000001F961EED1E0>, ('Age', True))]\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'str' object has no attribute 'value_counts'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-138-02af911c93ea>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_agg\u001b[1;34m(df, columns, buckets, relative_error, approx_count)\u001b[0m\n\u001b[0;32m    371\u001b[0m             \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    372\u001b[0m             \u001b[0mfuncs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount_uniques_agg\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 373\u001b[1;33m             \u001b[0mexprs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    374\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    375\u001b[0m             \u001b[1;31m# TODO: in basic calculations only funcs = [F.min, F.max]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\pandas\\columns.py\u001b[0m in \u001b[0;36mcreate_exprs\u001b[1;34m(columns, funcs, *args)\u001b[0m\n\u001b[0;32m    252\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0m_exprs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    253\u001b[0m             \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 254\u001b[1;33m             \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_agg_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexprs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    255\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    256\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\pandas\\columns.py\u001b[0m in \u001b[0;36m_agg_exprs\u001b[1;34m(_funcs)\u001b[0m\n\u001b[0;32m    242\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    243\u001b[0m                     \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0m_filter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_col_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_func\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 244\u001b[1;33m                         \u001b[0magg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_func\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0m_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    245\u001b[0m                         \u001b[1;32mif\u001b[0m \u001b[0magg\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    246\u001b[0m                             \u001b[0mfunc_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_beautify_col_names\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_func\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\pandas\\functions.py\u001b[0m in \u001b[0;36mcount_uniques_agg\u001b[1;34m(col_name, estimate)\u001b[0m\n\u001b[0;32m     68\u001b[0m         \u001b[1;33m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     69\u001b[0m         \u001b[1;32mdef\u001b[0m \u001b[0mcount_uniques_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcol_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mestimate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 70\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mcol_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     71\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     72\u001b[0m         \u001b[1;31m# @staticmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'str' object has no attribute 'value_counts'"
     ]
    }
   ],
   "source": [
    "from optimus.profiler.profiler import Profiler\n",
    "p = Profiler()\n",
    "print(df.functions)\n",
    "p.columns_agg(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<optimus.pandas.functions.functions.<locals>.Functions object at 0x0000021950468C50>\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'Functions' object has no attribute 'zeros_agg'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-26-e792527df240>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0moptimus\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprofiler\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\helpers\\decorators.py\u001b[0m in \u001b[0;36mtimed\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mtimed\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      9\u001b[0m         \u001b[0mstart_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m         \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     11\u001b[0m         \u001b[0m_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     12\u001b[0m         \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"{name}() executed in {time} sec\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0m_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mrun\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count)\u001b[0m\n\u001b[0;32m    119\u001b[0m         \u001b[1;31m# df.ext.set_meta({\"initialized\": True})\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 121\u001b[1;33m         \u001b[0moutput\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuckets\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"dict\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    122\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    123\u001b[0m         \u001b[1;31m# Load jinja\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mdataset\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count, sample, stats, format)\u001b[0m\n\u001b[0;32m    246\u001b[0m         \u001b[1;31m# if [\"drop\", \"rename\"] not in trans and self.already_run is False:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    247\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mstats\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 248\u001b[1;33m             \u001b[0moutput_columns\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns_stats\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuckets\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    250\u001b[0m         \u001b[0massign\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput_columns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"name\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_stats\u001b[1;34m(self, df, columns, buckets, infer, relative_error, approx_count)\u001b[0m\n\u001b[0;32m    331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    332\u001b[0m         \u001b[1;31m# Aggregation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 333\u001b[1;33m         \u001b[0mstats\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mProfiler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns_agg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuckets\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrelative_error\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    334\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    335\u001b[0m         \u001b[1;31m# Calculate Frequency\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\profiler\\profiler.py\u001b[0m in \u001b[0;36mcolumns_agg\u001b[1;34m(df, columns, buckets, relative_error, approx_count)\u001b[0m\n\u001b[0;32m    371\u001b[0m             \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    372\u001b[0m             \u001b[0mfuncs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunctions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcount_uniques_agg\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 373\u001b[1;33m             \u001b[0mexprs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_exprs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcols\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mapprox_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    374\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    375\u001b[0m             \u001b[1;31m# TODO: in basic calculations only funcs = [F.min, F.max]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Documents\\Optimus\\optimus\\pandas\\columns.py\u001b[0m in \u001b[0;36mcreate_exprs\u001b[1;34m(columns, funcs, *args)\u001b[0m\n\u001b[0;32m    204\u001b[0m             \u001b[1;31m# Std, kurtosis, mean, skewness and other agg functions can not process date columns.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    205\u001b[0m             filters = {\"date\": [F.stddev, F.kurtosis, F.mean, F.skewness, F.sum, F.variance, F.approx_count_distinct,\n\u001b[1;32m--> 206\u001b[1;33m                                 self.functions.zeros_agg],\n\u001b[0m\u001b[0;32m    207\u001b[0m                        \"array\": [F.stddev, F.kurtosis, F.mean, F.skewness, F.sum, F.variance, F.approx_count_distinct,\n\u001b[0;32m    208\u001b[0m                                  self.functions.zeros_agg],\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'Functions' object has no attribute 'zeros_agg'"
     ]
    }
   ],
   "source": [
    "from optimus.profiler.profiler import Profiler\n",
    "p = Profiler()\n",
    "p.run(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Name': 'object', 'Age': 'int64'}"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cols.dtypes()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Name': 'object', 'Age': 'int64'}\n"
     ]
    }
   ],
   "source": [
    "print({k:str(v) for k,v in dict(df.dtypes).items()})\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
